package com.symria.system.test;


import java.util.ArrayList;  
import java.util.List;  
import java.util.regex.Matcher;  
import java.util.regex.Pattern;  

import com.symria.framework.util.HttpClientUtils;

public class MatchHtmlElementAttrValue {
	 /** 
     * 获取指定HTML标签的指定属性的值 
     * @param source 要匹配的源文本 
     * @param element 标签名称 
     * @param attr 标签的属性名称 
     * @return 属性值列表 
     */  
	
	//<table class="small-tbimg"
    public static List<String> match(String source, String element, String attr) {  
        List<String> result = new ArrayList<String>();  
        String reg = "<" + element + "[^<>]*?\\s" + attr + "=['\"]?(.*?)['\"]?\\s.*?>";  
        
        String regex = "<table.* (.*?)</table>";
        
        Matcher m = Pattern.compile(regex).matcher(source);  
        while (m.find()) {  
            String r = m.group(1);  
            result.add(r);  
        }  
        return result;  
    }  
      //<table class="small-tbimg" cellspacing="0" cellpadding="0">
	public static String getDomain(String source) {
		String string = null;
		//<\\s*a\\s.*?href\\s*=\\s*[^>]*\\s*>\\s*(.*?)\\s*<\\s*/\\s*a\\s*>
		//<\\s*a\\s.*?href\\s*=\\s*[^>]*\\s*>\\s*(.*?)\\s*<\\s*/\\s*a\\s*>;  
		//Pattern pattern = Pattern.compile("(http\\:\\/\\/|https\\:\\/\\/|ftp\\:\\/\\/)?([^\\/\\?]*).*");
		//Pattern pattern = Pattern.compile("(<table.*>)");
		String regxpForJsTag = "<\\s*a\\s.*?href\\s*=\\s*[^>]*\\s*>\\s*(.*?)\\s*<\\s*/\\s*a\\s*>";  
		String tableRegexp ="(<\\s*table\\s.*?class\\s*=\\s*[^>]*\\s*>\\s*(.*?)\\s*<\\s*/\\s*table\\s*>)";
		Pattern pattern = Pattern.compile(tableRegexp);
		Matcher matcher = pattern.matcher(source);
		while(matcher.find()) {
			
			string = matcher.group(1);
		}
		return string;
	}
    
    public static void main(String[] args) {  
    	//String source = HttpClientUtils.readFromURL("http://bj.58.com/xianhua/pn3/?key=%E9%B2%9C%E8%8A%B1&cmcskey=&final=1&specialtype=gls&PGTID=14159346641510.2971285520919633&ClickID=1", null);
    	
    	String source = "<html><table class=\"small-tbimg\" cellspacing=\"0\" cellpadding=\"0\"><a href=\"#\">我的世界</a>  aaaaaaaaaaa</table></html>";
        //List<String> list = match(source, "div", "class");  
        //System.out.println(list);  
        
        System.out.println(getDomain(source));
    }  
}
